[SLPVectorizer] Support SLPVectorizer cases of tan across all backends #95517

farzonl · 2024-06-14T08:52:39Z

This PR is intended to address the limited SLPVectorizer support of tan raised in the comments of this PR: #94559.

Right now emitting the tan intrinsisic allows you to vectorize tan, but emitting the libfunc does not. to address this the libcall needs to be mapped to the intrinsic. and the libcall and function name need to be marked approriately so they can be optimized or defined as a call lowering.

llvmbot · 2024-06-14T08:53:09Z

@llvm/pr-subscribers-backend-webassembly
@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Farzon Lotfi (farzonl)

Changes

Add a default f16 type promotion

Full diff: https://github.com/llvm/llvm-project/pull/95517.diff

10 Files Affected:

(modified) llvm/include/llvm/Analysis/TargetLibraryInfo.h (+3)
(modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+7-4)
(modified) llvm/lib/Analysis/ValueTracking.cpp (+4)
(modified) llvm/lib/CodeGen/TargetLoweringBase.cpp (+2-1)
(modified) llvm/test/CodeGen/RISCV/half-intrinsics.ll (+120)
(modified) llvm/test/CodeGen/WebAssembly/simd-unsupported.ll (+16)
(modified) llvm/test/Transforms/LoopVectorize/intrinsic.ll (+54)
(modified) llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll (+5-7)
(modified) llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll (+5-7)
(modified) llvm/test/Transforms/SLPVectorizer/X86/call.ll (+19)

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index f5da222d11f55..ce0cc38baf77e 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -415,10 +415,12 @@ class TargetLibraryInfo {
       return false;
     switch (F) {
     default: break;
+    // clang-format off
     case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
     case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
     case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
     case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
+    case LibFunc_tan:          case LibFunc_tanf:       case LibFunc_tanl:
     case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
     case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
                                                    case LibFunc_sqrtl_finite:
@@ -437,6 +439,7 @@ class TargetLibraryInfo {
     case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
     case LibFunc_strcpy:       case LibFunc_stpcpy:     case LibFunc_strlen:
     case LibFunc_strnlen:      case LibFunc_memchr:     case LibFunc_mempcpy:
+    // clang-format on
       return true;
     }
     return false;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 7828bdc1f1f43..b1d426830f0da 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -156,14 +156,17 @@ class TargetTransformInfoImplBase {
     StringRef Name = F->getName();
 
     // These will all likely lower to a single selection DAG node.
+    // clang-format off
     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
-        Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+        Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
-        Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
-        Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
+        Name == "sin"  || Name == "sinf"  || Name == "sinl"  || 
+        Name == "cos"  || Name == "cosf"  || Name == "cosl"  || 
+        Name == "tan"  || Name == "tanf"  || Name == "tanl"  || 
+        Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
       return false;
-
+    // clang-format on
     // These are all likely to be optimized into something smaller.
     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8126d2a1acc27..0a477082a7d12 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3993,6 +3993,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
   case LibFunc_cosf:
   case LibFunc_cosl:
     return Intrinsic::cos;
+  case LibFunc_tan:
+  case LibFunc_tanf:
+  case LibFunc_tanl:
+    return Intrinsic::tan;
   case LibFunc_exp:
   case LibFunc_expf:
   case LibFunc_expl:
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8240a1fd7e2ff..de534994fa48c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() {
       setOperationAction(
           {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
            ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
-           ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
+           ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN},
           VT, Expand);
 
       // Constrained floating-point operations default to expand.
@@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() {
                       ISD::FTAN},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
+  setOperationAction(ISD::FTAN, MVT::f16, Promote);
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
 
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index c493a9b2cb1df..bfc26b0d65980 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) {
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3)  ; nan
   ret i1 %1
 }
+
+declare half @llvm.tan.f16(half)
+
+define half @tan_f16(half %a) nounwind {
+; RV32IZFH-LABEL: tan_f16:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call tanf
+; RV32IZFH-NEXT:    fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: tan_f16:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    addi sp, sp, -16
+; RV64IZFH-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT:    call tanf
+; RV64IZFH-NEXT:    fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT:    addi sp, sp, 16
+; RV64IZFH-NEXT:    ret
+;
+; RV32IZHINX-LABEL: tan_f16:
+; RV32IZHINX:       # %bb.0:
+; RV32IZHINX-NEXT:    addi sp, sp, -16
+; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT:    fcvt.s.h a0, a0
+; RV32IZHINX-NEXT:    call tanf
+; RV32IZHINX-NEXT:    fcvt.h.s a0, a0
+; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT:    addi sp, sp, 16
+; RV32IZHINX-NEXT:    ret
+;
+; RV64IZHINX-LABEL: tan_f16:
+; RV64IZHINX:       # %bb.0:
+; RV64IZHINX-NEXT:    addi sp, sp, -16
+; RV64IZHINX-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINX-NEXT:    fcvt.s.h a0, a0
+; RV64IZHINX-NEXT:    call tanf
+; RV64IZHINX-NEXT:    fcvt.h.s a0, a0
+; RV64IZHINX-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINX-NEXT:    addi sp, sp, 16
+; RV64IZHINX-NEXT:    ret
+;
+; RV32I-LABEL: tan_f16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    call __extendhfsf2
+; RV32I-NEXT:    call tanf
+; RV32I-NEXT:    call __truncsfhf2
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: tan_f16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    call __extendhfsf2
+; RV64I-NEXT:    call tanf
+; RV64I-NEXT:    call __truncsfhf2
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV32IZFHMIN-LABEL: tan_f16:
+; RV32IZFHMIN:       # %bb.0:
+; RV32IZFHMIN-NEXT:    addi sp, sp, -16
+; RV32IZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFHMIN-NEXT:    call tanf
+; RV32IZFHMIN-NEXT:    fcvt.h.s fa0, fa0
+; RV32IZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IZFHMIN-NEXT:    ret
+;
+; RV64IZFHMIN-LABEL: tan_f16:
+; RV64IZFHMIN:       # %bb.0:
+; RV64IZFHMIN-NEXT:    addi sp, sp, -16
+; RV64IZFHMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFHMIN-NEXT:    fcvt.s.h fa0, fa0
+; RV64IZFHMIN-NEXT:    call tanf
+; RV64IZFHMIN-NEXT:    fcvt.h.s fa0, fa0
+; RV64IZFHMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFHMIN-NEXT:    addi sp, sp, 16
+; RV64IZFHMIN-NEXT:    ret
+;
+; RV32IZHINXMIN-LABEL: tan_f16:
+; RV32IZHINXMIN:       # %bb.0:
+; RV32IZHINXMIN-NEXT:    addi sp, sp, -16
+; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINXMIN-NEXT:    fcvt.s.h a0, a0
+; RV32IZHINXMIN-NEXT:    call tanf
+; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
+; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
+; RV32IZHINXMIN-NEXT:    ret
+;
+; RV64IZHINXMIN-LABEL: tan_f16:
+; RV64IZHINXMIN:       # %bb.0:
+; RV64IZHINXMIN-NEXT:    addi sp, sp, -16
+; RV64IZHINXMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZHINXMIN-NEXT:    fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT:    call tanf
+; RV64IZHINXMIN-NEXT:    fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZHINXMIN-NEXT:    addi sp, sp, 16
+; RV64IZHINXMIN-NEXT:    ret
+  %1 = call half @llvm.tan.f16(half %a)
+  ret half %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
index d214a3af5a151..1d6e073271efa 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) {
   ret <4 x float> %v
 }
 
+; CHECK-LABEL: tan_v4f32:
+; CHECK: call $push[[L:[0-9]+]]=, tanf
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+define <4 x float> @tan_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
 ; CHECK-LABEL: powi_v4f32:
 ; CHECK: call $push[[L:[0-9]+]]=, __powisf2
 declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
@@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) {
   ret <2 x double> %v
 }
 
+; CHECK-LABEL: tan_v2f64:
+; CHECK: call $push[[L:[0-9]+]]=, tan
+declare <2 x double> @llvm.tan.v2f64(<2 x double>)
+define <2 x double> @tan_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
 ; CHECK-LABEL: powi_v2f64:
 ; CHECK: call $push[[L:[0-9]+]]=, __powidf2
 declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 0f070347dd4ef..9c910d70807a1 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -162,6 +162,60 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.cos.f64(double)
 
+define void @tan_f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: llvm.tan.v4f32
+; CHECK: ret void
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %call = tail call float @llvm.tan.f32(float %0)
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.tan.f32(float)
+
+define void @tan_f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: llvm.tan.v4f64
+; CHECK: ret void
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %call = tail call double @llvm.tan.f64(double %0)
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.tan.f64(double)
+
 define void @exp_f32(i32 %n, ptr %y, ptr %x) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK: llvm.exp.v4f32
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
index 6db27e597a63f..eae38295ba08c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
-; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
-; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 24e16deacb3af..5e2dd305f0557 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
-; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
-; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 4181148a4d829..8835e3b144be6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -6,6 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 declare double @sin(double) nounwind willreturn
 declare double @cos(double) nounwind willreturn
+declare double @tan(double) nounwind willreturn
 declare double @pow(double, double) nounwind willreturn
 declare double @exp2(double) nounwind willreturn
 declare double @sqrt(double) nounwind willreturn
@@ -48,6 +49,24 @@ define void @cos_libm(ptr %a, ptr %b) {
   ret void
 }
 
+define void @tan_libm(ptr %a, ptr %b) {
+; CHECK-LABEL: @tan_libm(
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
+  %tan1 = tail call double @tan(double %a0) nounwind readnone
+  %tan2 = tail call double @tan(double %a1) nounwind readnone
+  store double %tan1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %tan2, ptr %idx2, align 8
+  ret void
+}
+
 define void @pow_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @pow_libm(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8

github-actions · 2024-06-14T08:55:24Z

✅ With the latest revision this PR passed the C/C++ code formatter.

nikic

Split split the backend changes into a separate PR.

Add a default f16 type promotion

farzonl · 2024-06-17T19:22:28Z

@nikic are there any other changes you want?

farzonl · 2024-06-19T19:53:26Z

@alexey-bataev if you have time could you take a look at this PR?

alexey-bataev · 2024-06-19T19:54:48Z

On a PTO, will return next week

alexey-bataev · 2024-06-26T12:51:34Z

Can we have some more tests for other targets too? Would be good to have a test for a target that does not support vectorization of tan

farzonl · 2024-06-26T18:12:13Z

Can we have some more tests for other targets too? Would be good to have a test for a target that does not support vectorization of tan

The tan llvm inttrinsic was defined in isTriviallyVectorizable:

llvm-project/llvm/lib/Analysis/VectorUtils.cpp

Line 71 in 8ab6677

case Intrinsic::tan:

This change is just linking the tan libfuncs to the intrinsic so the libfuncs can also be vectorized. As written there is no backend specific behavior. It is going to vectorize tan across all backends. In other words there doesn't appear to be any way to mark a libfunc or an intrinsic as isTriviallyVectorizable for a subset of backends, it is all or nothing. So I don't know how the test case you have asked for makes sense.

alexey-bataev · 2024-06-26T18:31:30Z

Can we have some more tests for other targets too? Would be good to have a test for a target that does not support vectorization of tan

Can we have some more tests for other targets too? Would be good to have a test for a target that does not support vectorization of tan

The tan llvm inttrinsic was defined in isTriviallyVectorizable:

llvm-project/llvm/lib/Analysis/VectorUtils.cpp

Line 71 in 8ab6677

case Intrinsic::tan:

This change is just linking the tan libfuncs to the intrinsic so the libfuncs can also be vectorized. As written there is no backend specific behavior. It is going to vectorize tan across all backends. In other words there doesn't appear to be any way to mark a libfunc or an intrinsic as isTriviallyVectorizable for a subset of backends, it is all or nothing. So I don't know how the test case you have asked for makes sense.

Need to be sure that the cost of the vector version is high enough for the targets that do not support it. Otherwise, they may suffer from the perf drop. And still need to add the test, especially codegen (if still not) to be sure that the targets can lower the vector versions correctly.

farzonl · 2024-06-26T21:29:49Z

Need to be sure that the cost of the vector version is high enough for the targets that do not support it. Otherwise, they may suffer from the perf drop. And still need to add the test, especially codegen (if still not) to be sure that the targets can lower the vector versions correctly.

I'm not familar with a way to check for perf impact across all backends. Could you give some guidance on how I could figure that out to answer this question?

I'm not familar enough with most these backends to know which ones needs tests. Trig functions seem to only be tested on RISCV, Aarch64, and x86. I'll do a deep dive below on the state of things and then maybe you can give me some expectations on which backends you would like to see tests for.

As for which backends support vectorization, I'm going to assume we can limit backends to what exists in llvm/test/Transforms/SLPVectorizer
That would be
AArch64/ AMDGPU/ ARM/ NVPTX/ PowerPC/ RISCV/ SystemZ/ VE/ WebAssembly/ X86/ XCore/

I think a partial list can be figured out from here:

llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines 124 to 134 in 62d5393

    
           enum VectorLibrary { 
        
             NoLibrary,        // Don't use any vector library. 
        
             Accelerate,       // Use Accelerate framework. 
        
             DarwinLibSystemM, // Use Darwin's libsystem_m. 
        
             LIBMVEC_X86,      // GLIBC Vector Math library. 
        
             MASSV,            // IBM MASS vector library. 
        
             SVML,             // Intel short vector math library. 
        
             SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions. 
        
             ArmPL,       // Arm Performance Libraries. 
        
             AMDLIBM      // AMD Math Vector library. 
        
           };

Accelerate framework (Apple?, x86?, aarch64?)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 51 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
DARWIN_LIBSYSTEM (MacOS? x86, aarch64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 97 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v")

Libm X86\X86_64

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 154 to 155 in 62d5393

    
           TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v") 
        
           TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")

IBM MASSV PowerPC

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 264 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4), "_ZGV_LLVM_N4v")

SVML (x86\X86_64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 328 to 330 in 62d5393

    
           TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v") 
        
           TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v") 
        
           TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")

SLEEF

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 689 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
SLEEF Scalable

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 837 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv")

All SLEEF is aarch64 only:

llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines 1295 to 1303 in 62d5393

    
           case SLEEFGNUABI: { 
        
             switch (TargetTriple.getArch()) { 
        
             default: 
        
               break; 
        
             case llvm::Triple::aarch64: 
        
             case llvm::Triple::aarch64_be: 
        
               addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF2); 
        
               addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4); 
        
               addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable);

ARMPL

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 1095 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

AMD Libm (x86\x86_64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 1279 to 1281 in 62d5393

    
           TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") 
        
           TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") 
        
           TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

So that leads me to believe there is vectoization support on x86, x86_64, arm, aarch64, and PowerPC.

There also might be some RISCV support based on what I found for sinf

llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll

Line 356 in 62d5393

declare float @sinf(float) readonly nounwind willreturn

So if we subtract what we know supports vectorization from the full list of tests we are left with AMDGPU, NVPTX, SystemZ, VE, WebAssembly, and XCore.

PTX has a f16x2:
https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-tan
AMDGPU I can't find any explict support even for scalar tan however HIP\RocM support on device tan operations so maybe i'm not looking in the right places
https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html
https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/group___math_float.html#ga0a27f2dd7ba6f1aa7c088f6e66b5e6b3
SystemZ does not have vectorization for any trig operations
which causes this bug in vectorized cosine: LLVM floating-point math intrinsics fail on s390x-unknown-linux-gnu rust-lang/packed_simd#14
It does have vectorization support for other ISD operations on a sub target basis:

llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Line 559 in a54704d

if (Subtarget.hasVector()) {
Xcore can't find an ISA, but there does appear to be scalar tan support: https://github.com/xmos/lib_xcore_math/blob/ca3161ff6f65f240bb3d022673e4b04b82ec63b7/doc/programming_guide/src/reference/scalar/csv/scalar_fixed_point_ops.csv#L5

WebAssembly Looks to handle by scalarizing:

llvm-project/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll

Lines 380 to 386 in a54704d

    
           ; CHECK-LABEL: tan_v4f32: 
        
           ; CHECK: call $push[[L:[0-9]+]]=, tanf 
        
           declare <4 x float> @llvm.tan.v4f32(<4 x float>) 
        
           define <4 x float> @tan_v4f32(<4 x float> %x) { 
        
             %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) 
        
             ret <4 x float> %v 
        
           }

VE I can't find a published document on the vector engine isa.

llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp

Lines 247 to 255 in a54704d

    
           for (MVT VT : MVT::fp_valuetypes()) { 
        
             setOperationAction(ISD::FABS, VT, Expand); 
        
             setOperationAction(ISD::FCOPYSIGN, VT, Expand); 
        
             setOperationAction(ISD::FCOS, VT, Expand); 
        
             setOperationAction(ISD::FMA, VT, Expand); 
        
             setOperationAction(ISD::FPOW, VT, Expand); 
        
             setOperationAction(ISD::FSIN, VT, Expand); 
        
             setOperationAction(ISD::FSQRT, VT, Expand); 
        
           }

, but it is only handling the scalar cases for sin\cos.

So one thing of note i discovered is that sin\cos are SLPvectorized despite the fact that some of these backends do not support it. That makes me wonder if sin\cos vectorization should be removed or if it is ok that tan can be vectorized even if it lacks support across all backends.

alexey-bataev · 2024-06-27T12:44:05Z

Need to be sure that the cost of the vector version is high enough for the targets that do not support it. Otherwise, they may suffer from the perf drop. And still need to add the test, especially codegen (if still not) to be sure that the targets can lower the vector versions correctly.

I'm not familar with a way to check for perf impact across all backends. Could you give some guidance on how I could figure that out to answer this question?

I'm not familar enough with most these backends to know which ones needs tests. Trig functions seem to only be tested on RISCV, Aarch64, and x86. I'll do a deep dive below on the state of things and then maybe you can give me some expectations on which backends you would like to see tests for.

As for which backends support vectorization, I'm going to assume we can limit backends to what exists in llvm/test/Transforms/SLPVectorizer That would be AArch64/ AMDGPU/ ARM/ NVPTX/ PowerPC/ RISCV/ SystemZ/ VE/ WebAssembly/ X86/ XCore/

I think a partial list can be figured out from here:

llvm-project/llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines 124 to 134 in 62d5393

enum VectorLibrary {

NoLibrary, // Don't use any vector library.

Accelerate, // Use Accelerate framework.

DarwinLibSystemM, // Use Darwin's libsystem_m.

LIBMVEC_X86, // GLIBC Vector Math library.

MASSV, // IBM MASS vector library.

SVML, // Intel short vector math library.

SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.

ArmPL, // Arm Performance Libraries.

AMDLIBM // AMD Math Vector library.

};

Accelerate framework (Apple?, x86?, aarch64?)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 51 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")

DARWIN_LIBSYSTEM (MacOS? x86, aarch64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 97 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v")

Libm X86\X86_64

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 154 to 155 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")

IBM MASSV PowerPC

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 264 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "__tanf4", FIXED(4), "_ZGV_LLVM_N4v")

SVML (x86\X86_64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 328 to 330 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")

SLEEF

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 689 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")

SLEEF Scalable

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 837 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv")

All SLEEF is aarch64 only:

llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines 1295 to 1303 in 62d5393

case SLEEFGNUABI: {

switch (TargetTriple.getArch()) {

default:

break;

case llvm::Triple::aarch64:

case llvm::Triple::aarch64_be:

addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF2);

addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4);

addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable);

ARMPL

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Line 1095 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

AMD Libm (x86\x86_64)

llvm-project/llvm/include/llvm/Analysis/VecFuncs.def

Lines 1279 to 1281 in 62d5393

TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

So that leads me to believe there is vectoization support on x86, x86_64, arm, aarch64, and PowerPC.

There also might be some RISCV support based on what I found for sinf

llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll

Line 356 in 62d5393

declare float @sinf(float) readonly nounwind willreturn

So if we subtract what we know supports vectorization from the full list of tests we are left with AMDGPU, NVPTX, SystemZ, VE, WebAssembly, and XCore.

PTX has a f16x2:
https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-tan

AMDGPU I can't find any explict support even for scalar tan however HIP\RocM support on device tan operations so maybe i'm not looking in the right places
https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html
https://rocm.docs.amd.com/projects/HIP/en/latest/doxygen/html/group___math_float.html#ga0a27f2dd7ba6f1aa7c088f6e66b5e6b3

SystemZ does not have vectorization for any trig operations
which causes this bug in vectorized cosine: LLVM floating-point math intrinsics fail on s390x-unknown-linux-gnu rust-lang/packed_simd#14
It does have vectorization support for other ISD operations on a sub target basis:

llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Line 559 in a54704d

if (Subtarget.hasVector()) {

Xcore can't find an ISA, but there does appear to be scalar tan support: https://github.com/xmos/lib_xcore_math/blob/ca3161ff6f65f240bb3d022673e4b04b82ec63b7/doc/programming_guide/src/reference/scalar/csv/scalar_fixed_point_ops.csv#L5

WebAssembly Looks to handle by scalarizing:

llvm-project/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll

Lines 380 to 386 in a54704d

; CHECK-LABEL: tan_v4f32:

; CHECK: call $push[[L:[0-9]+]]=, tanf

declare <4 x float> @llvm.tan.v4f32(<4 x float>)

define <4 x float> @tan_v4f32(<4 x float> %x) {

%v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)

ret <4 x float> %v

}

VE I can't find a published document on the vector engine isa.

llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp

Lines 247 to 255 in a54704d

for (MVT VT : MVT::fp_valuetypes()) {

setOperationAction(ISD::FABS, VT, Expand);

setOperationAction(ISD::FCOPYSIGN, VT, Expand);

setOperationAction(ISD::FCOS, VT, Expand);

setOperationAction(ISD::FMA, VT, Expand);

setOperationAction(ISD::FPOW, VT, Expand);

setOperationAction(ISD::FSIN, VT, Expand);

setOperationAction(ISD::FSQRT, VT, Expand);

}

, but it is only handling the scalar cases for sin\cos.

So one thing of note i discovered is that sin\cos are SLPvectorized despite the fact that some of these backends do not support it. That makes me wonder if sin\cos vectorization should be removed or if it is ok that tan can be vectorized even if it lacks support across all backends.

I had the patch for this issue some time ago (https://reviews.llvm.org/D154738, see @RKSimon response), we still need supporting this, such nodes should not be vectorized. Ok, let's keep it as is for now

alexey-bataev

LG

farzonl · 2024-06-27T16:38:27Z

@nikic you are listed as code owner, are you happy with these changes aswell?

nikic

LGTM

llvm-ci · 2024-06-27T19:45:20Z

LLVM Buildbot has detected a new failure on builder sanitizer-ppc64le-linux running on ppc64le-sanitizer while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/72/builds/520

Here is the relevant piece of the build log for the reference:

Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
PASS: ThreadSanitizer-Unit :: rtl/./TsanRtlTest-powerpc64le-Test/26/37 (729 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: printf-ldbl.c (730 of 2450)
PASS: SanitizerCommon-lsan-powerpc64le-Linux :: Posix/realpath.cpp (731 of 2450)
PASS: Profile-powerpc64le :: Posix/instrprof-visibility.cpp (732 of 2450)
PASS: SanitizerCommon-msan-powerpc64le-Linux :: Linux/aligned_alloc.c (733 of 2450)
PASS: SanitizerCommon-msan-powerpc64le-Linux :: malloc_hook.cpp (734 of 2450)
PASS: SanitizerCommon-lsan-powerpc64le-Linux :: Posix/access.cpp (735 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Linux/name_to_handle_at.cpp (736 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: print-stack-trace.cpp (737 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Posix/signal.cpp (738 of 2450)
FAIL: ThreadSanitizer-powerpc64le :: signal_block.cpp (739 of 2450)
******************** TEST 'ThreadSanitizer-powerpc64le :: signal_block.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 1: /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang  -fsanitize=thread -Wall  -m64 -fno-function-sections   -gline-tables-only -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../ -O1 /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp &&  /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp 2>&1 | FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang -fsanitize=thread -Wall -m64 -fno-function-sections -gline-tables-only -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../ -O1 /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
+ FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:59:15: error: CHECK-NOT: excluded string found in input
// CHECK-NOT: WARNING: ThreadSanitizer:
              ^
<stdin>:2:1: note: found here
WARNING: ThreadSanitizer: signal handler spoils errno (pid=3021677)
^~~~~~~~~~~~~~~~~~~~~~~~~

Input file: <stdin>
Check file: /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        1: ================== 
        2: WARNING: ThreadSanitizer: signal handler spoils errno (pid=3021677) 
not:59     !~~~~~~~~~~~~~~~~~~~~~~~~                                            error: no match expected
        3:  Signal 10 handler invoked at: 
        4:  #0 handler(int) /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:13 (signal_block.cpp.tmp+0xfea60) 
        5:  #1 thread(void*) /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:25:5 (signal_block.cpp.tmp+0xfebb0) 
        6:  
        7: SUMMARY: ThreadSanitizer: signal handler spoils errno /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:13 in handler(int) 
        8: ================== 
        9: DONE 
       10: ThreadSanitizer: reported 1 warnings 
>>>>>>

--

Step 11 (test compiler-rt debug) failure: test compiler-rt debug (failure)
...
PASS: ThreadSanitizer-Unit :: rtl/./TsanRtlTest-powerpc64le-Test/26/37 (729 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: printf-ldbl.c (730 of 2450)
PASS: SanitizerCommon-lsan-powerpc64le-Linux :: Posix/realpath.cpp (731 of 2450)
PASS: Profile-powerpc64le :: Posix/instrprof-visibility.cpp (732 of 2450)
PASS: SanitizerCommon-msan-powerpc64le-Linux :: Linux/aligned_alloc.c (733 of 2450)
PASS: SanitizerCommon-msan-powerpc64le-Linux :: malloc_hook.cpp (734 of 2450)
PASS: SanitizerCommon-lsan-powerpc64le-Linux :: Posix/access.cpp (735 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Linux/name_to_handle_at.cpp (736 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: print-stack-trace.cpp (737 of 2450)
PASS: SanitizerCommon-tsan-powerpc64le-Linux :: Posix/signal.cpp (738 of 2450)
FAIL: ThreadSanitizer-powerpc64le :: signal_block.cpp (739 of 2450)
******************** TEST 'ThreadSanitizer-powerpc64le :: signal_block.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 1: /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang  -fsanitize=thread -Wall  -m64 -fno-function-sections   -gline-tables-only -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../ -O1 /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp &&  /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp 2>&1 | FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/./bin/clang -fsanitize=thread -Wall -m64 -fno-function-sections -gline-tables-only -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/../ -O1 /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/tsan/POWERPC64LEConfig/Output/signal_block.cpp.tmp
+ FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp
/home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:59:15: error: CHECK-NOT: excluded string found in input
// CHECK-NOT: WARNING: ThreadSanitizer:
              ^
<stdin>:2:1: note: found here
WARNING: ThreadSanitizer: signal handler spoils errno (pid=3021677)
^~~~~~~~~~~~~~~~~~~~~~~~~

Input file: <stdin>
Check file: /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        1: ================== 
        2: WARNING: ThreadSanitizer: signal handler spoils errno (pid=3021677) 
not:59     !~~~~~~~~~~~~~~~~~~~~~~~~                                            error: no match expected
        3:  Signal 10 handler invoked at: 
        4:  #0 handler(int) /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:13 (signal_block.cpp.tmp+0xfea60) 
        5:  #1 thread(void*) /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:25:5 (signal_block.cpp.tmp+0xfebb0) 
        6:  
        7: SUMMARY: ThreadSanitizer: signal handler spoils errno /home/buildbots/llvm-external-buildbots/workers/ppc64le-sanitizer/sanitizer-ppc64le/build/llvm-project/compiler-rt/test/tsan/signal_block.cpp:13 in handler(int) 
        8: ================== 
        9: DONE 
       10: ThreadSanitizer: reported 1 warnings 
>>>>>>

--

llvm#95517) This PR is intended to address the limited SLPVectorizer support of tan raised in the comments of this PR: llvm#94559. Right now emitting the tan intrinsisic allows you to vectorize tan, but emitting the libfunc does not. to address this the libcall needs to be mapped to the intrinsic. and the libcall and function name need to be marked approriately so they can be optimized or defined as a call lowering.

farzonl requested review from davemgreen and efriedma-quic June 14, 2024 08:52

farzonl requested a review from nikic as a code owner June 14, 2024 08:52

llvmbot added backend:WebAssembly llvm:analysis llvm:transforms labels Jun 14, 2024

nikic requested changes Jun 14, 2024

View reviewed changes

farzonl mentioned this pull request Jun 14, 2024

[PowerPC] Mark llvm.tan on vectors as expand #95507

Closed

farzonl added 2 commits June 14, 2024 05:05

[CodeGen] Support SLPVectorizer cases of tan across all backends

3bbc260

Add a default f16 type promotion

remove target specific changes

62d5393

farzonl force-pushed the generic-vectorize-tan branch from 4e45379 to 62d5393 Compare June 14, 2024 09:13

farzonl removed the backend:WebAssembly label Jun 14, 2024

farzonl changed the title ~~[CodeGen] Support SLPVectorizer cases of tan across all backends~~ [SLPVectorizer] Support SLPVectorizer cases of tan across all backends Jun 14, 2024

farzonl requested a review from nikic June 14, 2024 09:22

farzonl mentioned this pull request Jun 14, 2024

[clang] Reland Add tanf16 builtin and support for tan constrained intrinsic #94559

Merged

nikic requested a review from alexey-bataev June 17, 2024 19:26

alexey-bataev approved these changes Jun 27, 2024

View reviewed changes

nikic approved these changes Jun 27, 2024

View reviewed changes

farzonl merged commit 918313d into llvm:main Jun 27, 2024
5 of 7 checks passed

farzonl deleted the generic-vectorize-tan branch June 28, 2024 06:48

[SLPVectorizer] Support SLPVectorizer cases of tan across all backends #95517

[SLPVectorizer] Support SLPVectorizer cases of tan across all backends #95517

Uh oh!

Conversation

farzonl commented Jun 14, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 14, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Jun 14, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

farzonl commented Jun 17, 2024

Uh oh!

farzonl commented Jun 19, 2024

Uh oh!

alexey-bataev commented Jun 19, 2024

Uh oh!

alexey-bataev commented Jun 26, 2024

Uh oh!

farzonl commented Jun 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

alexey-bataev commented Jun 26, 2024

Uh oh!

farzonl commented Jun 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

alexey-bataev commented Jun 27, 2024

Uh oh!

alexey-bataev left a comment

Choose a reason for hiding this comment

Uh oh!

farzonl commented Jun 27, 2024

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jun 27, 2024

Uh oh!

Uh oh!

farzonl commented Jun 14, 2024 •

edited

Loading

llvmbot commented Jun 14, 2024 •

edited

Loading

github-actions bot commented Jun 14, 2024 •

edited

Loading

farzonl commented Jun 26, 2024 •

edited

Loading

farzonl commented Jun 26, 2024 •

edited

Loading